Run UMAP on CD4+ events which express a COMPASS subset. Repeat for CD8 events.
The question being asked is “What are the memory and activation profiles of Ag-specific T cells?”

This time around, don’t sample the events.
Don’t stratify by groups, but rather color the sub-localization of the different markers.
Color by Cohort and Antigen (S1, S2, NCAP, VEMP, including DMSO)
Also color by
- Degree of functionality
- Cytokine - CD45RA
- CCR7
- HLA-DR
- CD38
Boxplots?

library(openCyto)
library(CytoML) # 1.12.0
library(flowCore) # required for description()
library(flowWorkspace) # required for gh_pop_get_data()
library(here)
library(tidyverse)
library(uwot)
library(ggplot2)
library(scales)
library(patchwork)
library(hues)
library(RColorBrewer)
library(ggrepel)
library(ggpubr)
library(tidyselect)
source(here::here("scripts/20200604_Helper_Functions.R")) # for distributeEvents() and sampleGatingHierarchy()
date <- 20200815
save_output <- TRUE
rerun_dimred <- FALSE

gs <- load_gs(here::here("out/GatingSets/20200815_HAARVI_ICS_GatingSet_AllBatches_with_COMPASS_Subsets"))
## loading R object...
## loading tree object...
## Done
gs2 <- subset(gs, !(`SAMPLE ID` %in% c("37C", "BWT23", "116C", "BWT22")) &
                !(`SAMPLE ID` == "551432" & STIM == "Spike 2"))
dput(gh_get_pop_paths(gs2))
## c("root", "/Time", "/Time/LD-3+", "/Time/LD-3+/1419-3+", "/Time/LD-3+/1419-3+/S", 
## "/Time/LD-3+/1419-3+/S/Lymph", "/Time/LD-3+/1419-3+/S/Lymph/4+", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/4+/154", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL2", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/TNF", "/Time/LD-3+/1419-3+/S/Lymph/8+", 
## "/Time/LD-3+/1419-3+/S/Lymph/8+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/CD38+", 
## "/Time/LD-3+/1419-3+/S/Lymph/HLADR+", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+", 
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/154", 
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/CD45RA+", 
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL2", 
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL4513", 
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/TNF", "/Time/LD-3+/1419-3+/S/Lymph/8+/107a", 
## "/Time/LD-3+/1419-3+/S/Lymph/8+/154", "/Time/LD-3+/1419-3+/S/Lymph/8+/IL2", 
## "/Time/LD-3+/1419-3+/S/Lymph/8+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/8+/IL4513", 
## "/Time/LD-3+/1419-3+/S/Lymph/8+/TNF", "/Time/LD-3+/1419-3+/S/Lymph/8+/CCR7+", 
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD45RA+", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_NOT_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_NOT_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_IL4513_AND_NOT_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_NOT_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_NOT_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_NOT_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_NOT_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_NOT_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_107a_AND_NOT_154_AND_NOT_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_NOT_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_NOT_IFNG_AND_NOT_IL17_AND_IL2_AND_IL4513_AND_NOT_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_NOT_IFNG_AND_IL17_AND_NOT_IL2_AND_IL4513_AND_NOT_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_NOT_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_NOT_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_NOT_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_NOT_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_IL2_AND_IL4513_AND_NOT_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_NOT_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_NOT_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_NOT_IFNG_AND_NOT_IL17_AND_IL2_AND_IL4513_AND_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_IFNG_AND_NOT_IL17_AND_IL2_AND_IL4513_AND_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_107a_AND_154_AND_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets", "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_NOT_107a_AND_NOT_154_AND_NOT_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_IL4513_AND_NOT_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_NOT_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_NOT_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_107a_AND_NOT_154_AND_NOT_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_NOT_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_NOT_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_NOT_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_NOT_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_NOT_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_TNF", 
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_COMPASS_Subsets", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets/Naive", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets/TCM", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets/TEMRA", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets/TEM", "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_COMPASS_Subsets/Naive", 
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_COMPASS_Subsets/TCM", "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_COMPASS_Subsets/TEMRA", 
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_COMPASS_Subsets/TEM", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets/HLADR+CD38+", 
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_COMPASS_Subsets/HLADR+CD38+"
## )

CD4

Extract mfi data

cd4_gates_for_dimred <- c(
  "/Time/LD-3+/1419-3+/S/Lymph/4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/4+/154", 
  "/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL2", 
  "/Time/LD-3+/1419-3+/S/Lymph/4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513", 
  "/Time/LD-3+/1419-3+/S/Lymph/4+/TNF",
  "/Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+",
  "/Time/LD-3+/1419-3+/S/Lymph/CD38+", "/Time/LD-3+/1419-3+/S/Lymph/HLADR+")
cd4_cytokine_gates <- c("/Time/LD-3+/1419-3+/S/Lymph/4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/4+/154", 
                        "/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL2", 
                        "/Time/LD-3+/1419-3+/S/Lymph/4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513", 
                        "/Time/LD-3+/1419-3+/S/Lymph/4+/TNF")
cd4_compass_subsets_parentGate <- "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets"

pop_counts <- pData(gs2) %>% 
  left_join(gs_pop_get_count_fast(gs2, subpopulations = cd4_compass_subsets_parentGate),
            by = c("rowname" = "name")) %>% 
  dplyr::rename(CD4_COMPASS_Subsets = Count) %>% 
  dplyr::select(rowname, Batch, "SAMPLE ID", STIM, Cohort, CD4_COMPASS_Subsets) %>% 
  dplyr::filter(!(Cohort %in% c(NA, "Healthy control", "Healthy control 2017-2018")) & STIM != "SEB")

Keep in mind that there is lopsided patient and group representation simply due to not sampling:

cd4_compass_subsets_sampleSizes_4plot <- pop_counts %>%
  mutate(Cohort = factor(Cohort,
                         levels = c("Non-hospitalized", "Hospitalized"),
                         labels = c("Conv\nNon-Hosp", "Conv\nHosp")))
ggplot(cd4_compass_subsets_sampleSizes_4plot,
       aes(factor(Cohort), CD4_COMPASS_Subsets)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.15, height = 0) +
  theme_bw(base_size=20) +
  labs(title="ICS CD4 UMAP patient representation",
       y="CD4+ COMPASS Subset+ Events\n for Dimensionality Reduction\n(not sampled)") +
  facet_grid(Batch ~ STIM) +
  theme(axis.title.x = element_blank())

# Extract data for dimensionality reduction (not actually sampling)
call_sampleGatingHierarchy_for_cd4 <- function(currentSampleName) {
  # print(sprintf("Sampling data from %s", currentSampleName))
  sampleGatingHierarchy(gs2[[currentSampleName]], cd4_compass_subsets_parentGate, n = NULL, otherGates = cd4_gates_for_dimred)
}
cd4_compass_subsets_data <- map_dfr(pop_counts$rowname, call_sampleGatingHierarchy_for_cd4)
dim(cd4_compass_subsets_data)
## [1] 119921     55
knitr::kable(head(cd4_compass_subsets_data))
name EXPERIMENT NAME $DATE SAMPLE ID PATIENT ID STIM WELL ID PLATE NAME filename rowname Sample ID Collection date Cell count Cohort Age Sex Race Hispanic? Days symptom onset to visit 1 Pair ID Race_v2 Batch /Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets /Time/LD-3+/1419-3+/S/Lymph/4+/107a /Time/LD-3+/1419-3+/S/Lymph/4+/154 /Time/LD-3+/1419-3+/S/Lymph/4+/IFNG /Time/LD-3+/1419-3+/S/Lymph/4+/IL2 /Time/LD-3+/1419-3+/S/Lymph/4+/IL17 /Time/LD-3+/1419-3+/S/Lymph/4+/IL4513 /Time/LD-3+/1419-3+/S/Lymph/4+/TNF /Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+ /Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+ /Time/LD-3+/1419-3+/S/Lymph/CD38+ /Time/LD-3+/1419-3+/S/Lymph/HLADR+ Time FSC-A FSC-H SSC-A SSC-H CD8b TNFa CD107a CD154 CD3 ECD IL2 CD4 IL17a IL4/5/13 CD14/CD19 CCR7 CD38 L/D IFNg CD45RA HLADR
112405.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 DMSO A02 P2 15545_A2_A02_002.fcs 112405.fcs_332150 15545 2020-04-24 N/A Hospitalized 63 F Asian N/A 47 7 Asian 1 1 0 0 0 0 0 0 1 0 0 0 0 1.502 115906.84 96585 27485.91 27497 1064.57202 1440.45300 393.4271 648.1245 2909.432 619.5652 1705.126 861.0718 777.4784 1025.7701 625.7065 631.8912 1210.5221 965.0119 1267.6622 499.4432
112405.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 DMSO A02 P2 15545_A2_A02_002.fcs 112405.fcs_332150 15545 2020-04-24 N/A Hospitalized 63 F Asian N/A 47 7 Asian 1 1 0 0 0 0 0 1 0 1 1 0 0 2.011 50676.04 36166 13068.27 12006 337.69443 592.89911 446.3646 816.1756 2913.806 592.0229 1654.902 486.0011 2992.3889 979.4083 2385.1418 1194.2875 716.8176 820.3757 2494.2458 561.7028
112405.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 DMSO A02 P2 15545_A2_A02_002.fcs 112405.fcs_332150 15545 2020-04-24 N/A Hospitalized 63 F Asian N/A 47 7 Asian 1 1 0 0 0 0 0 0 1 1 0 0 0 2.277 95741.76 77970 31374.81 30438 404.73019 2342.62720 421.5775 1543.7673 2929.278 1151.6838 1943.653 1273.3855 859.4286 847.0671 1992.1693 1230.5979 1355.7234 390.6719 1200.6874 458.7832
112405.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 DMSO A02 P2 15545_A2_A02_002.fcs 112405.fcs_332150 15545 2020-04-24 N/A Hospitalized 63 F Asian N/A 47 7 Asian 1 1 0 0 0 1 0 0 0 1 1 0 0 2.419 91918.96 80343 12862.08 12647 -24.24598 260.90768 391.5533 1706.6123 3063.037 1603.8656 1705.122 963.4144 852.1685 751.9517 2187.2285 1580.1456 1159.5941 751.5898 2670.7056 855.4450
112405.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 DMSO A02 P2 15545_A2_A02_002.fcs 112405.fcs_332150 15545 2020-04-24 N/A Hospitalized 63 F Asian N/A 47 7 Asian 1 1 0 0 1 0 0 0 0 1 0 0 0 2.831 108113.04 96344 13686.84 13304 1261.14404 83.61147 786.2542 477.3380 2977.775 295.5194 1813.501 594.5399 631.5034 1235.4028 1751.8550 778.6345 1021.9509 2112.5845 1752.8688 682.8989
112405.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 DMSO A02 P2 15545_A2_A02_002.fcs 112405.fcs_332150 15545 2020-04-24 N/A Hospitalized 63 F Asian N/A 47 7 Asian 1 1 0 1 0 1 0 0 1 1 0 0 0 3.067 72211.40 57974 22689.60 21602 40.91488 2249.43335 788.5528 1994.1859 3052.448 2396.2529 1849.460 1073.7261 609.6157 651.3246 2341.2646 1157.6080 1250.0403 234.1040 949.8956 706.2128

Is there maybe one cytokine that is dominating the entire sample?
If one cytokine has very high background expression (and a generous gate), it could be gated positive in a lot of events.
The high number of events expressing this cytokine could lead to it dominating the data, so that most sampled events are positive for this noisy cytokine. It would drown out real signal from other cytokines.

cytokine_dominance <- cd4_compass_subsets_data %>% 
  group_by(Batch) %>% 
  summarise_at(cd4_cytokine_gates, sum) %>%
  t() %>%
  as.data.frame() %>%
  set_names(c("B1", "B2", "B3")) %>%
  rownames_to_column("Cytokine_Gate") %>%
  dplyr::filter(Cytokine_Gate != "Batch")
knitr::kable(cytokine_dominance)
Cytokine_Gate B1 B2 B3
/Time/LD-3+/1419-3+/S/Lymph/4+/107a 10571 15787 12987
/Time/LD-3+/1419-3+/S/Lymph/4+/154 10647 12296 12535
/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG 7346 4490 5410
/Time/LD-3+/1419-3+/S/Lymph/4+/IL2 10328 8292 9823
/Time/LD-3+/1419-3+/S/Lymph/4+/IL17 606 746 1275
/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513 6356 9478 10858
/Time/LD-3+/1419-3+/S/Lymph/4+/TNF 11204 11027 14674
cytokine_dominance %>%
  pivot_longer(cols = starts_with("B"), names_to = "Batch", values_to = "Events_in_Gate") %>% 
  mutate(Cytokine = sub(".*4\\+\\/(.*)", "\\1", Cytokine_Gate)) %>% 
  ggplot(aes(Cytokine, Events_in_Gate, fill = Cytokine)) +
  theme_bw(base_size=18) +
  geom_bar(stat="identity") +
  facet_grid(. ~ Batch) +
  labs(title = "CD4 Run Cytokine Dominance by Batch")

Perform Dimensionality Reduction

cols_4_dimred <- c("CD3 ECD", "CD8b", "CD4",
                  "TNFa", "CD107a", 
                  "CD154", "IL2", "IL17a", 
                  "IL4/5/13", "IFNg", 
                  "CCR7", "CD45RA",
                  "CD38", "HLADR")
cd4.scaled_dimred_input <- cd4_compass_subsets_data %>% 
  dplyr::select(Batch, all_of(cols_4_dimred)) %>% 
  group_by(Batch) %>% 
  nest() %>% 
  ungroup() %>% 
  mutate(data = lapply(data, function(df) {as.data.frame(scale(as.matrix(df)))})) %>% 
  unnest(cols = c(data)) %>% 
  rename_at(vars(all_of(cols_4_dimred)),function(x) paste0(x,".scaled")) %>% 
  dplyr::select(-Batch)

cd4_compass_subsets_data <- cbind(cd4_compass_subsets_data, cd4.scaled_dimred_input)

# UMAP can take a long time, so there is a rerun_dimred switch
if(rerun_dimred) {
  print("Running UMAP")
  set.seed(date)
  print(Sys.time())
  cd4_compass_subsets_dimred_out <- cd4_compass_subsets_data %>% 
    # Run CD3, co-receptor, cytokine, memory, and activation markers through UMAP
    dplyr::select(all_of(paste0(cols_4_dimred, ".scaled"))) %>% 
    uwot::umap(spread = 9, min_dist = 0.02, n_threads = 7)
  print(Sys.time())
  cd4_compass_subsets_w_umap <- cbind(as.data.frame(cd4_compass_subsets_dimred_out) %>% 
    dplyr::rename(x.umap = V1, y.umap = V2),
    cd4_compass_subsets_data)
  if(save_output) {
    saveRDS(cd4_compass_subsets_w_umap, here::here(sprintf("out/UMAP/%s_ICS_CD4_COMPASS_Subsets_UMAP_Unsampled.rds", date)))
  }
} else {
  # Assuming UMAP results are already saved
  print("Loading saved UMAP run")
  cd4_compass_subsets_w_umap <- readRDS(here::here(sprintf("out/UMAP/%s_ICS_CD4_COMPASS_Subsets_UMAP_Unsampled.rds", date)))
}
## [1] "Loading saved UMAP run"

Plot UMAP results

Shuffle data frame rows so e.g. Batch 3 doesn’t dominate foreground

set.seed(date)
cd4_compass_subsets_w_umap <- cd4_compass_subsets_w_umap[sample(nrow(cd4_compass_subsets_w_umap), nrow(cd4_compass_subsets_w_umap)),]
# Arial font setup. Downloaded afms from https://github.com/microsoft/microsoft-r-open/tree/ec3fd89e5fb5794bd8149905c134ad801bb61800
Arial <- Type1Font(family = "Arial",
                   metrics = c(here::here("data/Arial_afm/ArialMT.afm"),
                               here::here("data/Arial_afm/ArialMT-Bold.afm"), 
                               here::here("data/Arial_afm/ArialMT-Italic.afm"),
                               here::here("data/Arial_afm/ArialMT-BoldItalic.afm")))
pdfFonts(Arial = Arial)

boolColorScheme <- c("FALSE" = "#E2E2E2", "TRUE" = "#023FA5")

cd4_compass_subsets_w_umap <- cd4_compass_subsets_w_umap %>% 
  mutate(cytokine_degree = rowSums(dplyr::select(., all_of(cd4_cytokine_gates))))
cd4_compass_subsets_w_umap <- cd4_compass_subsets_w_umap %>% 
  mutate(Cohort = factor(Cohort, levels = c("Non-hospitalized", "Hospitalized")),
         STIM = factor(STIM, levels = c("DMSO", "Spike 1", "Spike 2", "NCAP", "VEMP")))

stim_labs <- c("DMSO", "S1", "S2", "NCAP", "VEMP") 
names(stim_labs) <- c("DMSO", "Spike 1", "Spike 2", "NCAP", "VEMP")
cohort_labs <- c("Conv\nNon-Hosp", "Conv\nHosp")
names(cohort_labs) <- c("Non-hospitalized", "Hospitalized")

base_dimred_plot <- function(currentColumn, pointSize = 0.02, colorScheme = NA) {
  p <- ggplot(cd4_compass_subsets_w_umap, aes(x=x.umap, y=y.umap,
                                       colour=if(currentColumn %in% c("Batch", "SAMPLE ID")) {
                                         factor(!!as.name(currentColumn))
                                         } else {
                                           as.logical(!!as.name(currentColumn))
                                           })) +
    geom_point(shape=20, alpha=0.8, size=pointSize) +
    facet_grid(Cohort ~ STIM, switch="y",
               labeller = labeller(Cohort = cohort_labs, STIM = stim_labs)) +
    theme_bw() +
    theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
          axis.text=element_blank(),
          axis.line=element_blank(),
          axis.ticks=element_blank(),
          axis.title=element_blank(),
          strip.text=element_text(face="bold", size=22),
          panel.grid.major = element_blank(),
          legend.title=element_text(face="bold", size=14),
          strip.text.x = element_text(margin = margin(0.15,0,0.15,0, "cm")))
  if(!anyNA(colorScheme)) {
    p <- p + scale_color_manual(values = colorScheme)
  }
  if(currentColumn %in% c("Batch", "SAMPLE ID")) {
    p <- p + labs(color = currentColumn) +
      guides(colour = guide_legend(override.aes = list(size=10))) +
      theme(legend.title = element_text(size=15),
            legend.text = element_text(size=15),
            legend.position = "bottom") +
      scale_colour_manual(values=as.character(iwanthue(length(unique(cd4_compass_subsets_w_umap[,currentColumn])))))
  } else {
    p <- p + theme(legend.position = "none")
  }
  p
}

Look for Batch and Patient sub-clustering

base_dimred_plot("Batch")

base_dimred_plot("SAMPLE ID")

Cytokine, Memory, and Activation expression localization

Now visualize the cytokine, memory, and activation expression localization

for(cg in cd4_gates_for_dimred) {
  print(base_dimred_plot(cg, colorScheme = boolColorScheme) +
    labs(title = sprintf("CD4+ COMPASS Subset+ UMAP\nColored by %s", sub(".*\\/([^(\\/)]+)", "\\1", cg))))
}

Color by degree

table(cd4_compass_subsets_w_umap$cytokine_degree)
## 
##     1     2     3     4     5 
## 85294 11137 14998  8286   206
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_colour_gradientn(colours = myPalette(100), limits=c(1, 6))

currentColumn <- "cytokine_degree"
pointSize <- 0.02
ggplot(cd4_compass_subsets_w_umap, aes(x=x.umap, y=y.umap, colour=!!as.name(currentColumn))) +
  geom_point(shape=20, alpha=0.8, size=pointSize) +
  facet_grid(Cohort ~ STIM, switch="y",
             labeller = labeller(Cohort = cohort_labs, STIM = stim_labs)) +
  labs(colour="Cytokine\nDegree",
       title="CD4+ COMPASS Subset+ UMAP\nColored by Cytokine Polyfunctionality") +
  theme_bw() +
  theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
        axis.text=element_blank(),
        axis.line=element_blank(),
        axis.ticks=element_blank(),
        axis.title=element_blank(),
        strip.text=element_text(face="bold", size=22),
        panel.grid.major = element_blank(),
        legend.title=element_text(face="bold", size=15),
        legend.text = element_text(size=13),
        strip.text.x = element_text(margin = margin(0.15,0,0.15,0, "cm")),
        legend.position = "bottom") +
  sc

Unstratified UMAP plots

pointSize <- 0.02
# Theme settings
# Expand axis limits so contour lines don't get cut off near borders
gg_themes <- ggplot(cd4_compass_subsets_w_umap, aes(x=x.umap, y=y.umap)) +
  scale_x_continuous(limits=range(cd4_compass_subsets_w_umap$x.umap)*1.1) +
  scale_y_continuous(limits=range(cd4_compass_subsets_w_umap$y.umap)*1.1) +
  theme_bw() +
  theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
        axis.text=element_blank(),
        axis.line=element_blank(),
        axis.ticks=element_blank(),
        axis.title=element_blank(),
        panel.grid = element_blank(),
        legend.position = "none",
        plot.margin = margin(1, 0, 0, 0, unit = "pt"))
# Hospitalization status contour plots
hosp_contour_plot <- gg_themes +
  geom_point(shape=20, alpha=0.8, size=pointSize, colour="#E2E2E2") +
  geom_density_2d(data=cd4_compass_subsets_w_umap %>% dplyr::filter(Cohort == "Hospitalized"),
                  colour="#363636", bins=12) + # "red"
  ggtitle("Hosp")

nonhosp_contour_plot <- gg_themes +
  geom_point(shape=20, alpha=0.8, size=pointSize, colour="#E2E2E2") +
  geom_density_2d(data=cd4_compass_subsets_w_umap %>% dplyr::filter(Cohort == "Non-hospitalized"),
                  colour="#363636", bins=12) + # "blue"
  ggtitle("Not-Hosp")
# Scaled mfi expression plots for memory, activation, and cytokine markers 
mfi_col_text_4plot <- c("TNFa" = "TNF", "CD107a" = "CD107", 
                  "CD154" = "CD154", "IL2" = "IL-2", "IL17a" = "IL17a", 
                  "IL4/5/13" = "IL-4/5/13", "IFNg" = "IFN-γ", 
                  "CCR7" = "CCR7", "CD45RA" = "CD45RA",
                  "CD38" = "CD38", "HLADR" = "HLA-DR")
plot_scaled_mfi_v2 <- function(currentColumn) {
  myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
  sc_mfi <- scale_colour_gradientn(colours = myPalette(100), oob=squish, limits=c(-2.5, 2.5))
  gg_themes +
    geom_point(aes(colour=cd4_compass_subsets_w_umap[,paste0(currentColumn, ".scaled")]),
               shape=20, alpha=0.8, size=pointSize) + # !!as.name(paste0(currentColumn, ".scaled")) doesn't work
    sc_mfi +
    ggtitle(mfi_col_text_4plot[[currentColumn]])
}

unstratified_mfi_plot_cols <- c("TNFa", "CD107a", 
                  "CD154", "IL2", "IL17a", 
                  "IL4/5/13", "IFNg", 
                  "CCR7", "CD45RA",
                  "CD38", "HLADR")
mfi_plots_unstrat <- lapply(unstratified_mfi_plot_cols, plot_scaled_mfi_v2)
names(mfi_plots_unstrat) <- unstratified_mfi_plot_cols
# Memory UMAP plot colored by gate membership (TEMRA, TEM, TCM, Naive)
getMemoryCategory <- function(CD45RA, CCR7) {
  if(CD45RA & CCR7) {
    "Naive"
  } else if(CD45RA & !CCR7) {
    "TEMRA"
  } else if(!CD45RA & CCR7) {
    "TCM"
  } else if(!CD45RA & !CCR7) {
    "TEM"
  } else {
    "Uncategorized"
  }
}

cd4_compass_subsets_w_umap$MemoryCategory <- pmap_chr(.l = list(cd4_compass_subsets_w_umap$`/Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+`,
                                                               cd4_compass_subsets_w_umap$`/Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+`),
                                                     .f = getMemoryCategory)

myPalette(4)
## [1] "#5E4FA2" "#BEE4A0" "#FDBE6F" "#9E0142"
scales::show_col(myPalette(4))

memColorScheme <- c("TEM" = "#9E0142", "TEMRA" = "#BEE4A0", "TCM" = "#5E4FA2", "Naive" = "#FDBE6F")
mem_plot_colored_by_gate <- gg_themes +
  geom_point(aes(colour=cd4_compass_subsets_w_umap[,"MemoryCategory"]),
               shape=20, alpha=0.8, size=pointSize) +
  ggtitle("Memory") +
  scale_color_manual(values = memColorScheme)
# + theme(legend.position = "right") + guides(colour = guide_legend(override.aes = list(size=9)))

mem_plot_colored_by_gate

# Activation plots: color by gate membership, one plot per marker
boolColorScheme <- c("0" = "#E2E2E2", "1" = "#363636")
hladr_bool_plot <- gg_themes +
  geom_point(aes(colour=factor(cd4_compass_subsets_w_umap[,"/Time/LD-3+/1419-3+/S/Lymph/HLADR+"], levels = c(1, 0))),
               shape=20, alpha=0.8, size=pointSize) +
  scale_color_manual(values = boolColorScheme, labels=c("1"="Expressed", "0"="Not Expressed")) +
  ggtitle("HLA-DR")
cd38_bool_plot <- gg_themes +
  geom_point(aes(colour=factor(cd4_compass_subsets_w_umap[,"/Time/LD-3+/1419-3+/S/Lymph/CD38+"], levels = c(1, 0))),
               shape=20, alpha=0.8, size=pointSize) +
  scale_color_manual(values = boolColorScheme, labels=c("1"="Expressed", "0"="Not Expressed")) +
  ggtitle("CD38")
# factor_colors = hsv((seq(0, 1, length.out = 4 + 1)[-1] +
#                          0.2)%%1, 0.7, 0.95)
# 
# stim_colors <- c("Spike 1" = "#fc68be", "Spike 2" = "#d94e09", "NCAP" = "#6B49F2", "VEMP" = "#D0F249", "DMSO" = "#91570a") # heatmap colors
# stim_colors <- c("Spike 1" = "#49F2BF", "Spike 2" = "#F2497C", "NCAP" = "#6B49F2", "VEMP" = "#D0F249", "DMSO" = "#91570a")
stim_abbreviations = c("Spike 1" = "S1", "Spike 2" = "S2", "NCAP" = "NCAP", "VEMP" = "VEMP", "DMSO" = "DMSO")

plot_stim_contour_plot <- function(currentStim) {
  gg_themes +
    geom_point(shape=20, alpha=0.8, size=pointSize, colour="#E2E2E2") +
    geom_density_2d(data=cd4_compass_subsets_w_umap %>% dplyr::filter(STIM == currentStim),
                    colour="#363636", bins=12) + # stim_colors[[currentStim]]
    ggtitle(stim_abbreviations[[currentStim]])
}

stims <- c("DMSO", "Spike 1", "Spike 2", "NCAP", "VEMP")
stim_plots <- lapply(stims, plot_stim_contour_plot)
names(stim_plots) <- stims
# Unstratified PolyF plot
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc_polyf <- scale_colour_gradientn(colours = myPalette(100), limits=c(1, 6))
polyf_plot_unstrat <- gg_themes +
  geom_point(aes(colour=cd4_compass_subsets_w_umap[,"cytokine_degree"]), shape=20, alpha=0.8, size=pointSize) +
  sc_polyf +
  ggtitle("PolyF")
# Extract legends
mfi_legend <- as_ggplot(get_legend(mfi_plots_unstrat$TNFa +
                                     theme(legend.position="bottom",
                                             legend.title.align = 0.5) +
                                     labs(colour="Scaled Expression") +
                                     guides(colour=guide_colorbar(title.position = "top"))))
polyf_legend <- as_ggplot(get_legend(polyf_plot_unstrat +
                                       theme(legend.position="bottom",
                                             legend.title.align = 0.5) +
                                       labs(colour="PolyF") +
                                       guides(colour=guide_colorbar(title.position = "top"))))
mem_legend <- as_ggplot(get_legend(mem_plot_colored_by_gate +
                                     theme(legend.position = "right",
                                           legend.title.align = 0.5) +
                                     guides(colour = guide_legend(override.aes = list(size=9))) +
                                     labs(colour="Memory")))
bool_legend <- as_ggplot(get_legend(hladr_bool_plot +
                                     theme(legend.position = "right",
                                           legend.title.align = 0.5) +
                                     guides(colour = guide_legend(override.aes = list(size=9))) +
                                     labs(colour="Gate Membership")))

Put it all together

print(
  # Top row
  (hosp_contour_plot | nonhosp_contour_plot | plot_spacer() |
  stim_plots$`Spike 1` | stim_plots$`Spike 2` | plot_spacer() |
  polyf_plot_unstrat | plot_spacer() | mfi_plots_unstrat$IFNg) /
  # Second row
  (mem_plot_colored_by_gate | plot_spacer() | plot_spacer() |
  stim_plots$NCAP | stim_plots$VEMP | plot_spacer() |
  mfi_plots_unstrat$TNFa | mfi_plots_unstrat$IL2 | mfi_plots_unstrat$`IL4/5/13`) /
  # Third row
  (hladr_bool_plot | cd38_bool_plot | plot_spacer() |
  stim_plots$DMSO | plot_spacer() | plot_spacer() |
  mfi_plots_unstrat$IL17a | mfi_plots_unstrat$CD107a | mfi_plots_unstrat$CD154 )
)

if(save_output) {
  png(file=here::here(sprintf("out/UMAP/%s_CD4_COMPASS_Subsets_UMAP.png",
                              date)),
      width=18, height=9, units="in", res=300)
    print(
      # Top row
      (hosp_contour_plot | nonhosp_contour_plot | plot_spacer() |
      stim_plots$`Spike 1` | stim_plots$`Spike 2` | plot_spacer() |
      polyf_plot_unstrat | plot_spacer() | mfi_plots_unstrat$IFNg) /
      # Second row
      (mem_plot_colored_by_gate | plot_spacer() | plot_spacer() |
      stim_plots$NCAP | stim_plots$VEMP | plot_spacer() |
      mfi_plots_unstrat$TNFa | mfi_plots_unstrat$IL2 | mfi_plots_unstrat$`IL4/5/13`) /
      # Third row
      (hladr_bool_plot | cd38_bool_plot | plot_spacer() |
      stim_plots$DMSO | plot_spacer() | plot_spacer() |
      mfi_plots_unstrat$IL17a | mfi_plots_unstrat$CD107a | mfi_plots_unstrat$CD154 )
    )
  dev.off()
  
  png(file=here::here(sprintf("out/UMAP/%s_CD4_COMPASS_Subsets_UMAP_MFI_Legend.png",
                            date)),
    width=3, height=1.5, units="in", res=300)
  print(mfi_legend)
  dev.off()
  
  png(file=here::here(sprintf("out/UMAP/%s_CD4_COMPASS_Subsets_UMAP_PolyF_Legend.png",
                            date)),
    width=3, height=1.5, units="in", res=300)
  print(polyf_legend)
  dev.off()
  
  png(file=here::here(sprintf("out/UMAP/%s_CD4_COMPASS_Subsets_UMAP_Memory_Legend.png",
                            date)),
    width=2, height=3, units="in", res=300)
  print(mem_legend)
  dev.off()
  
  png(file=here::here(sprintf("out/UMAP/%s_CD4_COMPASS_Subsets_UMAP_GateMembership_Legend.png",
                          date)),
  width=2, height=3, units="in", res=300)
  print(bool_legend)
  dev.off()
  
  # cairo_pdf(file=here::here(sprintf("out/UMAP/%s_CD4_COMPASS_Subsets_UMAP.cairo.pdf",
  #                             date)),
  #     width=12, height=9, onefile = TRUE, bg = "transparent", family = "Arial")
  # print(x)
  # dev.off()
}
## png 
##   2

CD8

# Drop certain wells for just the CD8 runs due to low CD8 count:
gs3 <- subset(gs2, !(STIM %in% c("Spike 2", "NCAP") & `SAMPLE ID` %in% c("BWT20", "15548")) &
         !(STIM == "Spike 2" & `SAMPLE ID` == "15530"))

Extract mfi data

cd8_gates_for_dimred <- c(
  "/Time/LD-3+/1419-3+/S/Lymph/8+/107a", "/Time/LD-3+/1419-3+/S/Lymph/8+/154", 
  "/Time/LD-3+/1419-3+/S/Lymph/8+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/8+/IL2", 
  "/Time/LD-3+/1419-3+/S/Lymph/8+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/8+/IL4513", 
  "/Time/LD-3+/1419-3+/S/Lymph/8+/TNF",
  "/Time/LD-3+/1419-3+/S/Lymph/8+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/8+/CD45RA+",
  "/Time/LD-3+/1419-3+/S/Lymph/CD38+", "/Time/LD-3+/1419-3+/S/Lymph/HLADR+")
cd8_cytokine_gates <- c("/Time/LD-3+/1419-3+/S/Lymph/8+/107a", "/Time/LD-3+/1419-3+/S/Lymph/8+/154", 
                        "/Time/LD-3+/1419-3+/S/Lymph/8+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/8+/IL2", 
                        "/Time/LD-3+/1419-3+/S/Lymph/8+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/8+/IL4513", 
                        "/Time/LD-3+/1419-3+/S/Lymph/8+/TNF")
cd8_compass_subsets_parentGate <- "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_COMPASS_Subsets"

pop_counts <- pData(gs3) %>% 
  left_join(gs_pop_get_count_fast(gs3, subpopulations = cd8_compass_subsets_parentGate),
            by = c("rowname" = "name")) %>% 
  dplyr::rename(CD8_COMPASS_Subsets = Count) %>% 
  dplyr::select(rowname, Batch, "SAMPLE ID", STIM, Cohort, CD8_COMPASS_Subsets) %>% 
  dplyr::filter(!(Cohort %in% c(NA, "Healthy control", "Healthy control 2017-2018")) & STIM != "SEB")

Keep in mind that there is lopsided patient and group representation simply due to not sampling:

cd8_compass_subsets_sampleSizes_4plot <- pop_counts %>%
  mutate(Cohort = factor(Cohort,
                         levels = c("Non-hospitalized", "Hospitalized"),
                         labels = c("Conv\nNon-Hosp", "Conv\nHosp")))
ggplot(cd8_compass_subsets_sampleSizes_4plot,
       aes(factor(Cohort), CD8_COMPASS_Subsets)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.15, height = 0) +
  theme_bw(base_size=20) +
  labs(title="ICS CD8 UMAP patient representation",
       y="CD8+ COMPASS Subset+ Events\n for Dimensionality Reduction\n(not sampled)") +
  facet_grid(Batch ~ STIM) +
  theme(axis.title.x = element_blank())

# Extract data for dimensionality reduction (not actually sampling)
call_sampleGatingHierarchy_for_cd8 <- function(currentSampleName) {
  # print(sprintf("Sampling data from %s", currentSampleName))
  sampleGatingHierarchy(gs3[[currentSampleName]], cd8_compass_subsets_parentGate, n = NULL, otherGates = cd8_gates_for_dimred)
}
cd8_compass_subsets_data <- map_dfr(pop_counts$rowname, call_sampleGatingHierarchy_for_cd8)
dim(cd8_compass_subsets_data)
## [1] 18503    55
knitr::kable(head(cd8_compass_subsets_data))
name EXPERIMENT NAME $DATE SAMPLE ID PATIENT ID STIM WELL ID PLATE NAME filename rowname Sample ID Collection date Cell count Cohort Age Sex Race Hispanic? Days symptom onset to visit 1 Pair ID Race_v2 Batch /Time/LD-3+/1419-3+/S/Lymph/8+/CD8_COMPASS_Subsets /Time/LD-3+/1419-3+/S/Lymph/8+/107a /Time/LD-3+/1419-3+/S/Lymph/8+/154 /Time/LD-3+/1419-3+/S/Lymph/8+/IFNG /Time/LD-3+/1419-3+/S/Lymph/8+/IL2 /Time/LD-3+/1419-3+/S/Lymph/8+/IL17 /Time/LD-3+/1419-3+/S/Lymph/8+/IL4513 /Time/LD-3+/1419-3+/S/Lymph/8+/TNF /Time/LD-3+/1419-3+/S/Lymph/8+/CCR7+ /Time/LD-3+/1419-3+/S/Lymph/8+/CD45RA+ /Time/LD-3+/1419-3+/S/Lymph/CD38+ /Time/LD-3+/1419-3+/S/Lymph/HLADR+ Time FSC-A FSC-H SSC-A SSC-H CD8b TNFa CD107a CD154 CD3 ECD IL2 CD4 IL17a IL4/5/13 CD14/CD19 CCR7 CD38 L/D IFNg CD45RA HLADR
112405.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 DMSO A02 P2 15545_A2_A02_002.fcs 112405.fcs_332150 15545 2020-04-24 N/A Hospitalized 63 F Asian N/A 47 7 Asian 1 1 0 0 0 0 0 1 0 1 1 0 0 13.62000 128749.32 109320 22627.83 21960 1908.852 709.8051 164.8308 966.8429 2795.675 415.6850 867.7651 969.8013 1819.7448 847.6880 1601.144 1112.540 1136.316 729.8315 2262.386 538.4516
112405.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 DMSO A02 P2 15545_A2_A02_002.fcs 112405.fcs_332150 15545 2020-04-24 N/A Hospitalized 63 F Asian N/A 47 7 Asian 1 1 0 0 0 0 0 1 0 1 1 0 0 38.17300 127575.12 114570 26595.03 24734 1734.726 420.9996 616.6493 1074.8611 2974.177 895.7813 718.3309 1041.2919 1460.7205 1416.2686 1483.615 1498.668 1327.409 290.9544 2259.455 648.9399
112405.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 DMSO A02 P2 15545_A2_A02_002.fcs 112405.fcs_332150 15545 2020-04-24 N/A Hospitalized 63 F Asian N/A 47 7 Asian 1 1 0 0 1 0 0 0 0 1 1 1 0 42.71600 78018.56 64151 19413.18 18663 2369.378 708.7510 1042.1219 266.5526 3009.480 678.0404 397.9678 248.2867 975.9810 830.8954 2528.764 1745.803 1237.203 1942.8108 2965.312 572.9742
112405.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 DMSO A02 P2 15545_A2_A02_002.fcs 112405.fcs_332150 15545 2020-04-24 N/A Hospitalized 63 F Asian N/A 47 7 Asian 1 1 0 0 1 0 0 0 0 1 1 0 0 56.74600 88374.32 70988 26498.46 26028 2404.623 551.2405 722.8105 872.4091 3106.231 877.5685 626.0448 907.5391 994.1599 1197.6831 2542.436 1090.572 1720.652 3415.9902 2976.415 919.1105
112405.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 DMSO A02 P2 15545_A2_A02_002.fcs 112405.fcs_332150 15545 2020-04-24 N/A Hospitalized 63 F Asian N/A 47 7 Asian 1 1 0 0 1 0 0 0 0 1 1 0 0 71.80400 56863.20 44923 18936.42 18159 2482.738 371.6367 953.5193 657.8126 3075.503 552.5499 979.8632 281.0577 952.5239 1170.0415 2217.235 1134.148 1207.746 1855.6052 2663.974 864.3541
112405.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 DMSO A02 P2 15545_A2_A02_002.fcs 112405.fcs_332150 15545 2020-04-24 N/A Hospitalized 63 F Asian N/A 47 7 Asian 1 1 1 0 0 0 0 0 0 1 1 1 0 91.68099 94648.12 81200 33363.63 29271 1689.272 748.1985 1529.1487 296.3163 2967.112 1113.0330 901.2040 745.3102 865.7036 1308.3577 2241.015 1723.847 1787.785 437.6838 2828.874 639.1063

Is there maybe one cytokine that is dominating the entire sample?
If one cytokine has very high background expression (and a generous gate), it could be gated positive in a lot of events.
The high number of events expressing this cytokine could lead to it dominating the data, so that most sampled events are positive for this noisy cytokine. It would drown out real signal from other cytokines.

cytokine_dominance <- cd8_compass_subsets_data %>% 
  group_by(Batch) %>% 
  summarise_at(cd8_cytokine_gates, sum) %>%
  t() %>%
  as.data.frame() %>%
  set_names(c("B1", "B2", "B3")) %>%
  rownames_to_column("Cytokine_Gate") %>%
  dplyr::filter(Cytokine_Gate != "Batch")
knitr::kable(cytokine_dominance)
Cytokine_Gate B1 B2 B3
/Time/LD-3+/1419-3+/S/Lymph/8+/107a 1997 4796 3114
/Time/LD-3+/1419-3+/S/Lymph/8+/154 0 0 0
/Time/LD-3+/1419-3+/S/Lymph/8+/IFNG 660 929 579
/Time/LD-3+/1419-3+/S/Lymph/8+/IL2 56 116 56
/Time/LD-3+/1419-3+/S/Lymph/8+/IL17 0 0 0
/Time/LD-3+/1419-3+/S/Lymph/8+/IL4513 1507 3444 2044
/Time/LD-3+/1419-3+/S/Lymph/8+/TNF 30 341 139
cytokine_dominance %>%
  pivot_longer(cols = starts_with("B"), names_to = "Batch", values_to = "Events_in_Gate") %>% 
  mutate(Cytokine = sub(".*4\\+\\/(.*)", "\\1", Cytokine_Gate)) %>% 
  ggplot(aes(Cytokine, Events_in_Gate, fill = Cytokine)) +
  theme_bw(base_size=18) +
  geom_bar(stat="identity") +
  facet_grid(. ~ Batch) +
  labs(title = "CD8 Run Cytokine Dominance by Batch")

Perform Dimensionality Reduction

cols_4_dimred <- c("CD3 ECD", "CD8b", "CD4",
                  "TNFa", "CD107a", 
                  "CD154", "IL2", "IL17a", 
                  "IL4/5/13", "IFNg", 
                  "CCR7", "CD45RA",
                  "CD38", "HLADR")
cd4.scaled_dimred_input <- cd8_compass_subsets_data %>% 
  dplyr::select(Batch, all_of(cols_4_dimred)) %>% 
  group_by(Batch) %>% 
  nest() %>% 
  ungroup() %>% 
  mutate(data = lapply(data, function(df) {as.data.frame(scale(as.matrix(df)))})) %>% 
  unnest(cols = c(data)) %>% 
  rename_at(vars(all_of(cols_4_dimred)),function(x) paste0(x,".scaled")) %>% 
  dplyr::select(-Batch)

cd8_compass_subsets_data <- cbind(cd8_compass_subsets_data, cd4.scaled_dimred_input)

# UMAP can take a long time, so there is a rerun_dimred switch
if(rerun_dimred) {
  print("Running UMAP")
  set.seed(date)
  print(Sys.time())
  cd8_compass_subsets_dimred_out <- cd8_compass_subsets_data %>% 
    # Run CD3, co-receptor, cytokine, memory, and activation markers through UMAP
    dplyr::select(all_of(paste0(cols_4_dimred, ".scaled"))) %>% 
    uwot::umap(spread = 9, min_dist = 0.02, n_threads = 7)
  print(Sys.time())
  cd8_compass_subsets_w_umap <- cbind(as.data.frame(cd8_compass_subsets_dimred_out) %>% 
    dplyr::rename(x.umap = V1, y.umap = V2),
    cd8_compass_subsets_data)
  if(save_output) {
    saveRDS(cd8_compass_subsets_w_umap, here::here(sprintf("out/UMAP/%s_ICS_CD8_COMPASS_Subsets_UMAP_Unsampled.rds", date)))
  }
} else {
  # Assuming UMAP results are already saved
  print("Loading saved UMAP run")
  cd8_compass_subsets_w_umap <- readRDS(here::here(sprintf("out/UMAP/%s_ICS_CD8_COMPASS_Subsets_UMAP_Unsampled.rds", date)))
}
## [1] "Loading saved UMAP run"

Plot UMAP results

Shuffle data frame rows so e.g. Batch 3 doesn’t dominate foreground

set.seed(date)
cd8_compass_subsets_w_umap <- cd8_compass_subsets_w_umap[sample(nrow(cd8_compass_subsets_w_umap), nrow(cd8_compass_subsets_w_umap)),]
# Arial font setup. Downloaded afms from https://github.com/microsoft/microsoft-r-open/tree/ec3fd89e5fb5794bd8149905c134ad801bb61800
Arial <- Type1Font(family = "Arial",
                   metrics = c(here::here("data/Arial_afm/ArialMT.afm"),
                               here::here("data/Arial_afm/ArialMT-Bold.afm"), 
                               here::here("data/Arial_afm/ArialMT-Italic.afm"),
                               here::here("data/Arial_afm/ArialMT-BoldItalic.afm")))
pdfFonts(Arial = Arial)

boolColorScheme <- c("FALSE" = "#E2E2E2", "TRUE" = "#023FA5")

cd8_compass_subsets_w_umap <- cd8_compass_subsets_w_umap %>% 
  mutate(cytokine_degree = rowSums(dplyr::select(., all_of(cd8_cytokine_gates))))
cd8_compass_subsets_w_umap <- cd8_compass_subsets_w_umap %>% 
  mutate(Cohort = factor(Cohort, levels = c("Non-hospitalized", "Hospitalized")),
         STIM = factor(STIM, levels = c("DMSO", "Spike 1", "Spike 2", "NCAP", "VEMP")))

stim_labs <- c("DMSO", "S1", "S2", "NCAP", "VEMP") 
names(stim_labs) <- c("DMSO", "Spike 1", "Spike 2", "NCAP", "VEMP")
cohort_labs <- c("Conv\nNon-Hosp", "Conv\nHosp")
names(cohort_labs) <- c("Non-hospitalized", "Hospitalized")

base_dimred_plot <- function(currentColumn, pointSize = 0.02, colorScheme = NA) {
  p <- ggplot(cd8_compass_subsets_w_umap, aes(x=x.umap, y=y.umap,
                                       colour=if(currentColumn %in% c("Batch", "SAMPLE ID")) {
                                         factor(!!as.name(currentColumn))
                                         } else {
                                           as.logical(!!as.name(currentColumn))
                                           })) +
    geom_point(shape=20, alpha=0.8, size=pointSize) +
    facet_grid(Cohort ~ STIM, switch="y",
               labeller = labeller(Cohort = cohort_labs, STIM = stim_labs)) +
    theme_bw() +
    theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
          axis.text=element_blank(),
          axis.line=element_blank(),
          axis.ticks=element_blank(),
          axis.title=element_blank(),
          strip.text=element_text(face="bold", size=22),
          panel.grid.major = element_blank(),
          legend.title=element_text(face="bold", size=14),
          strip.text.x = element_text(margin = margin(0.15,0,0.15,0, "cm")))
  if(!anyNA(colorScheme)) {
    p <- p + scale_color_manual(values = colorScheme)
  }
  if(currentColumn %in% c("Batch", "SAMPLE ID")) {
    p <- p + labs(color = currentColumn) +
      guides(colour = guide_legend(override.aes = list(size=10))) +
      theme(legend.title = element_text(size=15),
            legend.text = element_text(size=15),
            legend.position = "bottom") +
      scale_colour_manual(values=as.character(iwanthue(length(unique(cd8_compass_subsets_w_umap[,currentColumn])))))
  } else {
    p <- p + theme(legend.position = "none")
  }
  p
}

Look for Batch and Patient sub-clustering

base_dimred_plot("Batch")

base_dimred_plot("SAMPLE ID")

Cytokine, Memory, and Activation expression localization

Now visualize the cytokine, memory, and activation expression localization

for(cg in cd8_gates_for_dimred) {
  print(base_dimred_plot(cg, colorScheme = boolColorScheme) +
    labs(title = sprintf("CD8+ COMPASS Subset+ UMAP\nColored by %s", sub(".*\\/([^(\\/)]+)", "\\1", cg))))
}

Color by degree

table(cd8_compass_subsets_w_umap$cytokine_degree)
## 
##     1     2     3 
## 17468   765   270
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_colour_gradientn(colours = myPalette(100), limits=c(1, 6))

currentColumn <- "cytokine_degree"
pointSize <- 0.02
ggplot(cd8_compass_subsets_w_umap, aes(x=x.umap, y=y.umap, colour=!!as.name(currentColumn))) +
  geom_point(shape=20, alpha=0.8, size=pointSize) +
  facet_grid(Cohort ~ STIM, switch="y",
             labeller = labeller(Cohort = cohort_labs, STIM = stim_labs)) +
  labs(colour="Cytokine\nDegree",
       title="CD8+ COMPASS Subset+ UMAP\nColored by Cytokine Polyfunctionality") +
  theme_bw() +
  theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
        axis.text=element_blank(),
        axis.line=element_blank(),
        axis.ticks=element_blank(),
        axis.title=element_blank(),
        strip.text=element_text(face="bold", size=22),
        panel.grid.major = element_blank(),
        legend.title=element_text(face="bold", size=15),
        legend.text = element_text(size=13),
        strip.text.x = element_text(margin = margin(0.15,0,0.15,0, "cm")),
        legend.position = "bottom") +
  sc

Unstratified UMAP plots

pointSize <- 0.02
# Theme settings
# Expand axis limits so contour lines don't get cut off near borders
gg_themes <- ggplot(cd8_compass_subsets_w_umap, aes(x=x.umap, y=y.umap)) +
  scale_x_continuous(limits=range(cd8_compass_subsets_w_umap$x.umap)*1.16) +
  scale_y_continuous(limits=range(cd8_compass_subsets_w_umap$y.umap)*1.11) +
  theme_bw() +
  theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
        axis.text=element_blank(),
        axis.line=element_blank(),
        axis.ticks=element_blank(),
        axis.title=element_blank(),
        panel.grid = element_blank(),
        legend.position = "none",
        plot.margin = margin(1, 0, 0, 0, unit = "pt"))
# Hospitalization status contour plots
hosp_contour_plot <- gg_themes +
  geom_point(shape=20, alpha=0.8, size=pointSize, colour="#E2E2E2") +
  geom_density_2d(data=cd8_compass_subsets_w_umap %>% dplyr::filter(Cohort == "Hospitalized"),
                  colour="#363636", bins=12) + # "red"
  ggtitle("Hosp")

nonhosp_contour_plot <- gg_themes +
  geom_point(shape=20, alpha=0.8, size=pointSize, colour="#E2E2E2") +
  geom_density_2d(data=cd8_compass_subsets_w_umap %>% dplyr::filter(Cohort == "Non-hospitalized"),
                  colour="#363636", bins=12) + # "blue"
  ggtitle("Not-Hosp")
# Scaled mfi expression plots for memory, activation, and cytokine markers 
mfi_col_text_4plot <- c("TNFa" = "TNF", "CD107a" = "CD107", 
                  "CD154" = "CD154", "IL2" = "IL-2", "IL17a" = "IL17a", 
                  "IL4/5/13" = "IL-4/5/13", "IFNg" = "IFN-γ", 
                  "CCR7" = "CCR7", "CD45RA" = "CD45RA",
                  "CD38" = "CD38", "HLADR" = "HLA-DR")
plot_scaled_mfi_v2 <- function(currentColumn) {
  myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
  sc_mfi <- scale_colour_gradientn(colours = myPalette(100), oob=squish, limits=c(-2.5, 2.5))
  gg_themes +
    geom_point(aes(colour=cd8_compass_subsets_w_umap[,paste0(currentColumn, ".scaled")]),
               shape=20, alpha=0.8, size=pointSize) + # !!as.name(paste0(currentColumn, ".scaled")) doesn't work
    sc_mfi +
    ggtitle(mfi_col_text_4plot[[currentColumn]])
}

unstratified_mfi_plot_cols <- c("TNFa", "CD107a", 
                  "CD154", "IL2", "IL17a", 
                  "IL4/5/13", "IFNg", 
                  "CCR7", "CD45RA",
                  "CD38", "HLADR")
mfi_plots_unstrat <- lapply(unstratified_mfi_plot_cols, plot_scaled_mfi_v2)
names(mfi_plots_unstrat) <- unstratified_mfi_plot_cols
# Memory UMAP plot colored by gate membership (TEMRA, TEM, TCM, Naive)
getMemoryCategory <- function(CD45RA, CCR7) {
  if(CD45RA & CCR7) {
    "Naive"
  } else if(CD45RA & !CCR7) {
    "TEMRA"
  } else if(!CD45RA & CCR7) {
    "TCM"
  } else if(!CD45RA & !CCR7) {
    "TEM"
  } else {
    "Uncategorized"
  }
}

cd8_compass_subsets_w_umap$MemoryCategory <- pmap_chr(.l = list(cd8_compass_subsets_w_umap$`/Time/LD-3+/1419-3+/S/Lymph/8+/CD45RA+`,
                                                               cd8_compass_subsets_w_umap$`/Time/LD-3+/1419-3+/S/Lymph/8+/CCR7+`),
                                                     .f = getMemoryCategory)

memColorScheme <- c("TEM" = "#9E0142", "TEMRA" = "#BEE4A0", "TCM" = "#5E4FA2", "Naive" = "#FDBE6F")
mem_plot_colored_by_gate <- gg_themes +
  geom_point(aes(colour=cd8_compass_subsets_w_umap[,"MemoryCategory"]),
               shape=20, alpha=0.8, size=pointSize) +
  ggtitle("Memory") +
  scale_color_manual(values = memColorScheme)
# + theme(legend.position = "right") + guides(colour = guide_legend(override.aes = list(size=9)))
# Activation plots: color by gate membership, one plot per marker
boolColorScheme <- c("0" = "#E2E2E2", "1" = "#363636")
hladr_bool_plot <- gg_themes +
  geom_point(aes(colour=factor(cd8_compass_subsets_w_umap[,"/Time/LD-3+/1419-3+/S/Lymph/HLADR+"], levels = c(1, 0))),
               shape=20, alpha=0.8, size=pointSize) +
  scale_color_manual(values = boolColorScheme, labels=c("1"="Expressed", "0"="Not Expressed")) +
  ggtitle("HLA-DR")
cd38_bool_plot <- gg_themes +
  geom_point(aes(colour=factor(cd8_compass_subsets_w_umap[,"/Time/LD-3+/1419-3+/S/Lymph/CD38+"], levels = c(1, 0))),
               shape=20, alpha=0.8, size=pointSize) +
  scale_color_manual(values = boolColorScheme, labels=c("1"="Expressed", "0"="Not Expressed")) +
  ggtitle("CD38")
# factor_colors = hsv((seq(0, 1, length.out = 4 + 1)[-1] +
#                          0.2)%%1, 0.7, 0.95)
# 
# stim_colors <- c("Spike 1" = "#fc68be", "Spike 2" = "#d94e09", "NCAP" = "#6B49F2", "VEMP" = "#D0F249", "DMSO" = "#91570a") # heatmap colors
# stim_colors <- c("Spike 1" = "#49F2BF", "Spike 2" = "#F2497C", "NCAP" = "#6B49F2", "VEMP" = "#D0F249", "DMSO" = "#91570a")
stim_abbreviations = c("Spike 1" = "S1", "Spike 2" = "S2", "NCAP" = "NCAP", "VEMP" = "VEMP", "DMSO" = "DMSO")

plot_stim_contour_plot <- function(currentStim) {
  gg_themes +
    geom_point(shape=20, alpha=0.8, size=pointSize, colour="#E2E2E2") +
    geom_density_2d(data=cd8_compass_subsets_w_umap %>% dplyr::filter(STIM == currentStim),
                    colour="#363636", bins=12) + # stim_colors[[currentStim]]
    ggtitle(stim_abbreviations[[currentStim]])
}

stims <- c("DMSO", "Spike 1", "Spike 2", "NCAP", "VEMP")
stim_plots <- lapply(stims, plot_stim_contour_plot)
names(stim_plots) <- stims
# Unstratified PolyF plot
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc_polyf <- scale_colour_gradientn(colours = myPalette(100), limits=c(1, 6))
polyf_plot_unstrat <- gg_themes +
  geom_point(aes(colour=cd8_compass_subsets_w_umap[,"cytokine_degree"]), shape=20, alpha=0.8, size=pointSize) +
  sc_polyf +
  ggtitle("PolyF")
# Extract legends
mfi_legend <- as_ggplot(get_legend(mfi_plots_unstrat$TNFa +
                                     theme(legend.position="bottom",
                                             legend.title.align = 0.5) +
                                     labs(colour="Scaled Expression") +
                                     guides(colour=guide_colorbar(title.position = "top"))))
polyf_legend <- as_ggplot(get_legend(polyf_plot_unstrat +
                                       theme(legend.position="bottom",
                                             legend.title.align = 0.5) +
                                       labs(colour="PolyF") +
                                       guides(colour=guide_colorbar(title.position = "top"))))
mem_legend <- as_ggplot(get_legend(mem_plot_colored_by_gate +
                                     theme(legend.position = "right",
                                           legend.title.align = 0.5) +
                                     guides(colour = guide_legend(override.aes = list(size=9))) +
                                     labs(colour="Memory")))
bool_legend <- as_ggplot(get_legend(hladr_bool_plot +
                                     theme(legend.position = "right",
                                           legend.title.align = 0.5) +
                                     guides(colour = guide_legend(override.aes = list(size=9))) +
                                     labs(colour="Gate Membership")))

Put it all together

print(
  # Top row
  (hosp_contour_plot | nonhosp_contour_plot | plot_spacer() |
  stim_plots$`Spike 1` | stim_plots$`Spike 2` | plot_spacer() |
  polyf_plot_unstrat | plot_spacer() | mfi_plots_unstrat$IFNg) /
  # Second row
  (mem_plot_colored_by_gate | plot_spacer() | plot_spacer() |
  stim_plots$NCAP | stim_plots$VEMP | plot_spacer() |
  mfi_plots_unstrat$TNFa | mfi_plots_unstrat$IL2 | mfi_plots_unstrat$`IL4/5/13`) /
  # Third row
  (hladr_bool_plot | cd38_bool_plot | plot_spacer() |
  stim_plots$DMSO | plot_spacer() | plot_spacer() |
  mfi_plots_unstrat$IL17a | mfi_plots_unstrat$CD107a | mfi_plots_unstrat$CD154 )
)

if(save_output) {
  png(file=here::here(sprintf("out/UMAP/%s_CD8_COMPASS_Subsets_UMAP.png",
                              date)),
      width=18, height=9, units="in", res=300)
    print(
      # Top row
      (hosp_contour_plot | nonhosp_contour_plot | plot_spacer() |
      stim_plots$`Spike 1` | stim_plots$`Spike 2` | plot_spacer() |
      polyf_plot_unstrat | plot_spacer() | mfi_plots_unstrat$IFNg) /
      # Second row
      (mem_plot_colored_by_gate | plot_spacer() | plot_spacer() |
      stim_plots$NCAP | stim_plots$VEMP | plot_spacer() |
      mfi_plots_unstrat$TNFa | mfi_plots_unstrat$IL2 | mfi_plots_unstrat$`IL4/5/13`) /
      # Third row
      (hladr_bool_plot | cd38_bool_plot | plot_spacer() |
      stim_plots$DMSO | plot_spacer() | plot_spacer() |
      mfi_plots_unstrat$IL17a | mfi_plots_unstrat$CD107a | mfi_plots_unstrat$CD154 )
    )
  dev.off()
  
  png(file=here::here(sprintf("out/UMAP/%s_CD8_COMPASS_Subsets_UMAP_MFI_Legend.png",
                            date)),
    width=3, height=1.5, units="in", res=300)
  print(mfi_legend)
  dev.off()
  
  png(file=here::here(sprintf("out/UMAP/%s_CD8_COMPASS_Subsets_UMAP_PolyF_Legend.png",
                            date)),
    width=3, height=1.5, units="in", res=300)
  print(polyf_legend)
  dev.off()
  
  png(file=here::here(sprintf("out/UMAP/%s_CD8_COMPASS_Subsets_UMAP_Memory_Legend.png",
                            date)),
    width=2, height=3, units="in", res=300)
  print(mem_legend)
  dev.off()
  
  png(file=here::here(sprintf("out/UMAP/%s_CD8_COMPASS_Subsets_UMAP_GateMembership_Legend.png",
                          date)),
  width=2, height=3, units="in", res=300)
  print(bool_legend)
  dev.off()
  
  # cairo_pdf(file=here::here(sprintf("out/UMAP/%s_CD8_COMPASS_Subsets_UMAP.cairo.pdf",
  #                             date)),
  #     width=12, height=9, onefile = TRUE, bg = "transparent", family = "Arial")
  # print(x)
  # dev.off()
}
## png 
##   2